The PGC3 MDD study is including the following analyses to identify genes associated with MDD:


Read in results from all analyses

Show code
library(biomaRt)
library(GenomicRanges)
# Insert nearest gene information
gene_attributes = c('ensembl_gene_id', 'hgnc_symbol', 'external_gene_name','chromosome_name','start_position','end_position')
# GRCh37 for position
ensembl37 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl", GRCh=37)
Genes37<-getBM(attributes=gene_attributes, mart = ensembl37)
# remove alternative contigs
Genes37 <- Genes37[Genes37$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes37$cpid <- with(Genes37, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes37 <- Genes37[!duplicated(Genes37$ensembl_gene_id),]
Genes37 <- Genes37[!duplicated(Genes37$cpid),]
Genes37 <- Genes37[!duplicated(Genes37$external_gene_name),]

# GRCH38 for gene names
ensembl38 = useEnsembl(biomart="ensembl", dataset="hsapiens_gene_ensembl")
Genes38<-getBM(attributes=gene_attributes, mart = ensembl38)
# remove alternative contigs
Genes38 <- Genes38[Genes38$chromosome_name %in% c(as.character(1:22), 'X'),]
# remove duplicated entries
Genes38$cpid <- with(Genes38, paste0(chromosome_name, ':', start_position, '-', end_position))
Genes38 <- Genes38[!duplicated(Genes38$ensembl_gene_id),]
Genes38 <- Genes38[!duplicated(Genes38$cpid),]
#no_gene_name38 <- which(Genes38$external_gene_name == "")
#Genes38$external_gene_name[no_gene_name38] = Genes38$cpid[no_gene_name38]
#Genes38 <- Genes38[!duplicated(Genes38$external_gene_name),]

# 37 positions with 38 gene names
Genes <- merge(Genes37, Genes38[,c('ensembl_gene_id', 'chromosome_name', 'external_gene_name', 'hgnc_symbol', 'start_position', 'end_position')], by=c('ensembl_gene_id'), all=TRUE, suffix=c('.37', '.38'))


# copy over build 37 gene name if it is missing in 38
coalesce_gene_name <- which(is.na(Genes$external_gene_name.38))
Genes$external_gene_name = with(Genes, ifelse(is.na(external_gene_name.38) | external_gene_name.38 == "", yes=external_gene_name.37, no=external_gene_name.38))
##########
# Nearest gene
##########

library(data.table)

# Read in GWAS results
# Currently we are using results only for lead indepdendant associations from COJO
# I think this is fine
indep_hits<-fread(here::here('docs/tables/meta_snps_full_eur.cojo.txt'))

# Link SNPs to nearest gene

window<-50000

for(i in 1:nrow(indep_hits)){
  Genes_i<-Genes[which(Genes$start_position.37 < (indep_hits$BP[i] + window) & Genes$end_position.37 > (indep_hits$BP[i] - window) & Genes$chromosome_name.37 == indep_hits$CHR[i]),]
  if(nrow(Genes_i) != 0){
    gene_string<-NULL
    for(j in 1:nrow(Genes_i)){
      if(indep_hits$BP[i] > Genes_i$start_position.37[j] & indep_hits$BP[i] < Genes_i$end_position.37[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=0,
                                                   Pos=NA))
      }
      if(indep_hits$BP[i] < Genes_i$start_position.37[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=abs(indep_hits$BP[i] - Genes_i$start_position.37[j]),
                                                   Pos='-'))
      }
      if(indep_hits$BP[i] > Genes_i$end_position.37[j]){
        gene_string<-rbind(gene_string, data.frame(ID=Genes_i$external_gene_name[j],
                                                   ENSGID=Genes_i$ensembl_gene_id[j],
                                                   Dist=abs(indep_hits$BP[i] - Genes_i$end_position.37[j]),
                                                   Pos='+'))
      }
    }
    gene_string<-gene_string[order(gene_string$Dist),]
    indep_hits$NearestGene[i]<-as.character(gene_string$ID[1])
    indep_hits$NearestENSG[i]<-as.character(gene_string$ENSGID[1])
  } else {
    indep_hits$NearestGene[i]<-NA
    indep_hits$NearestENSG[i]<-NA
  }
}
##########
# SNP-finemapping
##########

# Read in finemapping results from Joni. This table shows genes implicated by the finemapping results, by a gene containing the entire 95% credible set.
finemap<-fread(here::here('docs/tables/finemapping/Locus_FineMapping_Full_Results.csv'))

# parse out gene names
finemap_genes<-unlist(strsplit(finemap$High.Confidence.Genes.Names, ','))
finemap_genes<-finemap_genes[finemap_genes != '-']
# parse out ensembl ids
finemap_geneids<-unlist(strsplit(finemap$High.Confidence.Genes.IDs, ','))
finemap_geneids<-finemap_geneids[finemap_geneids != '-']
finemap_geneids <- sapply(strsplit(finemap_geneids, '\\.'), function(g) g[[1]])
##########
# FastBAT
##########

# Read in FastBAT results
fastbat<-fread(here::here('docs/tables/fastBAT/mdd_fastbat_AllgeneMatrix.gene.fastbat'))
##########
# H-MAGMA
##########

hmagma<-fread(here::here('docs/tables/H-MAGMA/PGC_MDD_Results_mar2022.csv'))

# Exclude astrocytes
hmagma_noAstr<-hmagma[hmagma$Analysis != 'Astrocytes',]

# Apply FDR correction across all tests
hmagma_noAstr$P.FDR<-p.adjust(hmagma_noAstr$P, method = 'fdr')

hmagma_noAstr<-merge(hmagma_noAstr, Genes, by.x='GENE', by.y='ensembl_gene_id')
##########
# TWAS
##########

twas<-fread(here::here('docs/tables/twas/PGC_MDD3_twas_AllTissues_GW.txt'))
twas$chromosome_name <- as.character(twas$CHR)
twas$twas_id <- 1:nrow(twas)

# merge by scaffold (exact overlap)
twas_gr <- with(twas, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
genes_gr <- with(Genes[!is.na(Genes$chromosome_name.37),], GRanges(seqnames=chromosome_name.37, ranges=IRanges(start=start_position.37, end=end_position.37)))

twas_genes_overlaps <- findOverlaps(twas_gr, genes_gr, type='equal')

twas_scaffolds <- twas[twas_genes_overlaps@from,]
twas_scaffolds$ensembl_gene_id <- Genes$ensembl_gene_id[twas_genes_overlaps@to]
twas_scaffolds$external_gene_name.37 <- Genes$external_gene_name.37[twas_genes_overlaps@to]

# merge unmatched twas results by symbol and partial overlap
twas_unmatched <- twas[!twas$twas_id %in% twas_scaffolds$twas_id,]
twas_unmatched_gr <- with(twas_unmatched, GRanges(seqnames=chromosome_name, ranges=IRanges(start=P0, end=P1)))
# find overlaps
twas_unmatched_genes_overlaps <- findOverlaps(twas_unmatched_gr, genes_gr, maxgap=window)
# merge in gene names
twas_symbols <- twas_unmatched[twas_unmatched_genes_overlaps@from,]
twas_symbols$ensembl_gene_id <-  Genes$ensembl_gene_id[twas_unmatched_genes_overlaps@to]
twas_symbols$external_gene_name.37 <- Genes$external_gene_name.37[twas_unmatched_genes_overlaps@to]
# keep matches with symbols match
twas_matched_symbols <- twas_symbols[which(twas_symbols$ID == twas_symbols$external_gene_name.37),]

twas_genes <- rbind(twas_scaffolds, twas_matched_symbols)

twas_sig<-twas_genes[twas_genes$TWAS.P < 3.685926e-08,]
##########
# Colocalisation
##########

coloc<-read.csv(here::here('docs/tables/twas/PGC_MDD3_TWAS_colocalisation.csv'))
coloc_sig<-coloc[coloc$COLOC.PP4 > 0.8,]

coloc_sig <- merge(coloc_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# FOCUS 
##########

focus<-read.csv(here::here('docs/tables/twas/PGC_MDD3_TWAS.TWSig.FOCUS.results.csv'))

fusion <- fread(here::here("docs/tables/twas/PGC_MDD3_twas_AllTissues_TWSig_CLEAN.txt"))
fusion<-fusion[,c('PANEL','PANEL_clean_short','PANEL_clean'), with=F]
fusion<-fusion[!duplicated(fusion),]

focus<-merge(focus, fusion, by.x='SNP.weight.Set', by.y='PANEL_clean_short')

focus_sig<-focus[focus$FOCUS_pip > 0.5,]

focus_sig <- merge(focus_sig, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# Expression analysis based high confidence genes
##########

expression_highconf_res<-fread(here::here('docs/tables/twas/PGC3_MDD_TWAS_HighConf_results.csv'))

expression_highconf_res <- merge(expression_highconf_res, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
##########
# SMR
##########

# Read in the SMR results
smr_res<-list()

smr_res[['eQTLGen_Blood']]<-fread(here::here('docs/tables/twas/eqtlgen_smr_res_GW_withIDs.csv'))

names(smr_res[['eQTLGen_Blood']])[names(smr_res[['eQTLGen_Blood']]) == 'GeneSymbol']<-'HGNCName'
smr_res[['eQTLGen_Blood']]$eQTL_source<-'eQTLGen_Blood'

tissue<-c("Basalganglia","Cerebellum","Cortex","Hippocampus","Spinalcord")

for(tissue_i in tissue){
  smr_res[[tissue_i]]<-fread(here::here(paste0('docs/tables/twas/metabrain_',tissue_i,'_smr_res_GW_withIDs.csv')))
  smr_res[[tissue_i]]$eQTL_source<-paste0('MetaBrain_',tissue_i)
}

smr_res_dat<-do.call(rbind, smr_res)
smr_res_dat$p_SMR_fdr_all<-p.adjust(smr_res_dat$p_SMR, method="fdr")

smr_res_dat_sig<-smr_res_dat[smr_res_dat$p_SMR_fdr_all < 0.05,]
##########
# HEIDI
##########

heidi<-smr_res_dat_sig[smr_res_dat_sig$p_HEIDI > 0.05,]
##########
# PWAS
##########

# For no just read in the ROSMAP results
pwas<-NULL
for(i in 1:22){
  if(i != 6){
    pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i))))
  } else {
    pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i))))
    pwas<-rbind(pwas, fread(here::here(paste0('docs/tables/pwas/PGC_MDD3_pwas_rosmap_chr',i,'.MHC'))))
  }
}

pwas$TWAS.P.FDR<-p.adjust(pwas$TWAS.P)
pwas$ID<-gsub('\\..*','', pwas$ID)

# Read in PWAS and SMR results for all significant ROSMAP PWAS assoc results
pwas_smr_rosmap_banner<-fread(here::here('docs/tables/pwas/rosmap_banner_pwas_smr_results.csv'))

pwas_smr_rosmap_banner <- merge(pwas_smr_rosmap_banner, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='ID', by.y='external_gene_name.37', all.x=TRUE)
########
# PsyOPS
########

psyops <- fread(here::here('docs/tables/psyops/psyops_full_eur.cojo.txt'))
psyops$psy_id <- 1:nrow(psyops)
psyops_genes37 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.37')], by.x='gene', by.y='external_gene_name.37')
psyops_genes38 <- merge(psyops, Genes[,c('ensembl_gene_id', 'external_gene_name.38')], by.x='gene', by.y='external_gene_name.38')
psyops_genesen <- merge(psyops, Genes[,c('ensembl_gene_id','external_gene_name')], by.x='gene', by.y='ensembl_gene_id')
psyops_genesen$ensembl_gene_id <- psyops_genesen$gene
psyops_genesen$external_gene_name <- NULL

psyops_genes <- unique(rbind(psyops_genes37, psyops_genes38, psyops_genesen))
psyops_genes <- psyops_genes[!duplicated(psyops_genes$psy_id),]

psyops_select <- psyops_genes[with(psyops_genes, which(extreme_pLI | brain_enriched_expression | neurodevelopmental_disorder)),]

psyops_prioritised<-NULL
for(i in unique(psyops_select$lead_variant)){
  tmp<-psyops_select[psyops_select$lead_variant == i,]
  tmp<-tmp[tmp$PsyOPS_score == max(tmp$PsyOPS_score),]
  tmp<-tmp[tmp$distance == min(tmp$distance),]
  psyops_prioritised<-rbind(psyops_prioritised, tmp)
}

Create UpSet plot

This plot will show the number of genes returned by each analysis and the overlap of each analysis

Show code
# Create data.frame listing genes with T/F indicating whether it was supported by each analysis 
gene_overlap<-list()
gene_overlap[['Fine-mapping']]<-finemap_geneids
gene_overlap[['Expression']]<-expression_highconf_res$ensembl_gene_id
gene_overlap[['Protein']]<-pwas_smr_rosmap_banner$ensembl_gene_id[which(pwas_smr_rosmap_banner$banner_replicated  & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)]
gene_overlap[['Nearest Gene']]<-indep_hits$NearestENSG[!is.na(indep_hits$NearestENSG)]
gene_overlap[['fastBAT']]<-fastbat$Gene[fastbat$Pvalue < 2e-6]
gene_overlap[['H-MAGMA']]<-unique(hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 3.73e-6])
gene_overlap[['PsyOPS']]<-psyops_prioritised$ensembl_gene_id

library(UpSetR)

png(here::here('docs/figures/gene_consensus_upset_dense.png'), units = 'px', res=300, height=1500, width=2500)

upset(fromList(gene_overlap), nsets=10, order.by = "freq")

dev.off()
## quartz_off_screen 
##                 2
gene_overlap_highconf <- gene_overlap[c('Fine-mapping','Expression', 'Protein', 'PsyOPS')]

png(here::here('docs/figures/gene_consensus_upset_highconf.png'), units = 'px', res=300, height=1400, width=1500)

upset(fromList(gene_overlap_highconf), nsets=10, order.by = "freq")

dev.off()
## quartz_off_screen 
##                 2
Show UpSet plot of genes across all analyses
High-confidence genes
High-confidence genes
Show UpSet plot of genes across high-confidence analyses
High-confidence genes
High-confidence genes

Compare high confidence genes across all analyses

Show code
# Define high confidence associations
# - Genes implicated by finemapping
# - Genes implicated by TWAS, colocalisation and FOCUS from any panel
# - Genes implicated by PWAS, SMR, coloc and HEIDI in ROSMAP and Banner
high_conf<-unique(unlist(gene_overlap[c('Fine-mapping','Expression', 'Protein')]))
ENSGIDs <- Genes[,c('ensembl_gene_id', 'external_gene_name')]
names(ENSGIDs) <- c('ENSGID', 'ID')
high_conf_tab <- merge(data.frame(ENSGID=high_conf), ENSGIDs)

# finemap
# Joni said he used the same gene list as David Howard (fastBAT)
high_conf_tab$finemap<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% fastbat$Gene | high_conf_tab$ENSGID[i] %in% finemap_geneids){
    if(high_conf_tab$ENSGID[i] %in% finemap_geneids){
      high_conf_tab$finemap[i]<-'Sig'
    } else {
      high_conf_tab$finemap[i]<-'Non-Sig'
    }
  }
}

# expression
high_conf_tab$expression<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(!(high_conf_tab$ID[i] %in% twas$ID)){
    high_conf_tab$expression[i]<-'NA'
  } else {
    if(!(high_conf_tab$ID[i] %in% expression_highconf_res$ID)){
      high_conf_tab$expression[i]<-'Non-Sig'
    } else {
      if(expression_highconf_res$`SNP-weight Set`[expression_highconf_res$ID == high_conf_tab$ID[i]] == "CMC DLPFC Splicing"){
        high_conf_tab$expression[i]<-'Sig'
      } else {
        if(expression_highconf_res$TWAS.Z[expression_highconf_res$ID == high_conf_tab$ID[i]] > 0){
            high_conf_tab$expression[i]<-'Pos'
        } else {
            high_conf_tab$expression[i]<-'Neg'
        }
      }
    }
  }
}

# protein
high_conf_tab$protein<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(!(high_conf_tab$ENSGID[i] %in% pwas$ID)){
    high_conf_tab$protein[i]<-'NA'
  } else {
    if(!(high_conf_tab$ID[i] %in% pwas_smr_rosmap_banner$ID[which(pwas_smr_rosmap_banner$banner_replicated & pwas_smr_rosmap_banner$rosmap.causal & pwas_smr_rosmap_banner$smr.causal)])){
      high_conf_tab$protein[i]<-'Non-Sig'
    } else {
      if(pwas_smr_rosmap_banner$rosmap.TWAS.Z[pwas_smr_rosmap_banner$ID == high_conf_tab$ID[i]] > 0){
          high_conf_tab$protein[i]<-'Pos'
      } else {
          high_conf_tab$protein[i]<-'Neg'
      }
    }
  }
}

# fastBAT
high_conf_tab$fastBAT<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% fastbat$Gene){
    if(high_conf_tab$ENSGID[i] %in% fastbat$Gene[fastbat$Pvalue < 2e-6]){
      high_conf_tab$fastBAT[i]<-'Sig'
    } else {
      high_conf_tab$fastBAT[i]<-'Non-Sig'
    }
  }
}

# hmagma
high_conf_tab$hmagma<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE){
    if(high_conf_tab$ENSGID[i] %in% hmagma_noAstr$GENE[hmagma_noAstr$P.FDR < 0.05]){
      high_conf_tab$hmagma[i]<-'Sig'
    } else {
      high_conf_tab$hmagma[i]<-'Non-Sig'
    }
  }
}

# PsyOPS
high_conf_tab$psyops<-'NA'
for(i in 1:nrow(high_conf_tab)){
  if(high_conf_tab$ENSGID[i] %in% psyops_genes$ensembl_gene_id){
    if(high_conf_tab$ENSGID[i] %in% psyops_prioritised$ensembl_gene_id){
      high_conf_tab$psyops[i]<-'Sig'
    } else {
      high_conf_tab$psyops[i]<-'Non-Sig'
    }
  }
}

# Order genes by the number of analyses indicating them as high confidence.
high_conf_tab_log<-high_conf_tab[,c(-1, -2)]
high_conf_tab_log[high_conf_tab_log == 'NA']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Sig']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Non-Sig']<-'F'
high_conf_tab_log[high_conf_tab_log == 'Pos']<-'T'
high_conf_tab_log[high_conf_tab_log == 'Neg']<-'T'

high_conf_tab_log<-data.frame(sapply( high_conf_tab_log, as.logical))
high_conf_tab_log[is.na(high_conf_tab_log)]<-T

high_conf_tab_log$sum<-rowSums(high_conf_tab_log[,1:3])

high_conf_tab_ordered <-high_conf_tab[order(-high_conf_tab_log$sum, high_conf_tab$ID),-1]
high_conf_tab_ordered$ID<-factor(high_conf_tab_ordered$ID)

names(high_conf_tab_ordered)<-c('ID','Fine-mapping','Expression','Protein','fastBAT','H-MAGMA','PsyOPS')
high_conf_tab_melt<-melt(as.data.table(high_conf_tab_ordered), id.vars='ID')
high_conf_tab_melt$value<-factor(high_conf_tab_melt$value, levels=c('Non-Sig','Sig','Pos','Neg','NA'))
high_conf_tab_melt$ID<-factor(high_conf_tab_melt$ID, levels=rev(levels(high_conf_tab_ordered$ID)))

library(ggplot2)
library(cowplot)

png(here::here('docs/figures/gene_con_heatmap.png'), units = 'px', res=300, height=14000, width=2800)
ggplot(data = high_conf_tab_melt, aes(x='1', y = ID, fill=value)) +
  theme_minimal_grid(color = "white") +
  geom_tile(colour = 'black', width=1.5) +
  scale_fill_manual(values=c('#FFFFFF','#33FF33','#FF6666','#3399FF','#CCCCCC'), drop=F) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
    labs(x ='', y = "", title='', fill='') +
  facet_grid(~ variable) +
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        strip.background = element_rect(color="black", fill="grey95", size=0.1, linetype="solid"))
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2
## 3.4.0.
## ℹ Please use the `linewidth` argument instead.
dev.off()
## quartz_off_screen 
##                 2
Show heatmap of high confidence associations
High-confidence genes
High-confidence genes
  • Significant genes in from expression based analyses that are based on splicing data are indicated as green as direction of effect is challenging to interpret.
Gene table

Make a simple table of gene-by-method associations.

methods_genes <- dplyr::bind_rows(lapply(gene_overlap, function(x) data.frame(ENSID=x)), .id='method')
methods_genes$assoc = TRUE
genes_methods_wide <- 
tidyr::pivot_wider(dplyr::distinct(methods_genes), names_from='method', values_from='assoc', values_fill=FALSE)

genes_methods_gene_names <-
dplyr::left_join(genes_methods_wide,
  dplyr::select(Genes, ENSID=ensembl_gene_id, Gene=external_gene_name,
  Chrom_b37=chromosome_name.37, pos_start_b37=start_position.37, pos_end_b37=end_position.37,
  Chrom_b38=chromosome_name.38, pos_start_b38=start_position.38, pos_end_b38=end_position.38), by='ENSID')

genes_methods_cpid <- dplyr::select(genes_methods_gene_names, ENSID, Gene,
  Chrom_b37, pos_start_b37, pos_end_b37, Chrom_b38, pos_start_b38, pos_end_b38, 
  Nearest_gene=`Nearest Gene`, Fine_mapping=`Fine-mapping`, Expression, Protein, fastBAT, HMAGMA=`H-MAGMA`, PsyOPS)

genes_methods_cpid$chrom <- with(genes_methods_cpid, dplyr::coalesce(Chrom_b37, Chrom_b38))
genes_methods_cpid$chrom <- with(genes_methods_cpid, ifelse(chrom == 'X', yes=23, no=as.numeric(chrom)))

genes_methods_conf <-
dplyr::arrange(
  dplyr::filter(genes_methods_cpid, Nearest_gene | Fine_mapping | Expression | Protein | fastBAT | HMAGMA | PsyOPS),
  chrom, pos_start_b37, pos_start_b38
)

genes_methods_conf$chrom <- NULL
genes_methods_conf$Chrom_b38 <- paste0('chr', genes_methods_conf$Chrom_b38)

genes_methods_conf <-
dplyr::select(genes_methods_conf, ENSID, Gene,
  Nearest_gene, Fine_mapping, Expression, Protein, fastBAT, HMAGMA, PsyOPS,
  ends_with('b37'), ends_with('b38'))

readr::write_tsv(genes_methods_conf, here::here('docs/tables/genes_methods.tsv'), na='')

Compare high confidence genes across expression/protein panels and methods

This will give some indication of how fine-mapped variants may affect gene expression and protein levels, and may also give clarity for associations that have a different direction of effect in the TWAS and PWAS (which is still the case for the GOPC gene).

Show code
###
# Create a dataframe containing gene ID, PANEL, Method and Z scores for all expression and protein analyses.
###
all_func_res<-NULL

# TWAS
twas_tmp<-twas[,c('PANEL','ID','TWAS.Z'), with=F]
twas_tmp$Sig<-twas$TWAS.P < 3.685926e-08
twas_tmp$Coloc<-twas$COLOC.PP4 > 0.8
names(twas_tmp)<-c('Panel','ID','Z','Sig','Coloc')
twas_tmp$Method<-'FUSION'
twas_tmp$Type<-'Expr.'
twas_tmp$Type[grepl('SPLIC',twas_tmp$Panel)]<-'Splice'
# Retain only the most significant assoc for each gene within PANEL (only relevent for splice panel)
twas_tmp<-twas_tmp[order(-abs(twas_tmp$Z)),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$Tissue<-NA
twas_tmp$Tissue[!grepl('Adrenal|BLOOD|Blood|Thyroid',twas_tmp$Panel)]<-'Brain'
twas_tmp$Tissue[grepl('BLOOD|Blood',twas_tmp$Panel)]<-'Blood'
twas_tmp$Tissue[grepl('Adrenal|Thyroid',twas_tmp$Panel)]<-'HPA/HPT'

twas_tmp<-merge(twas_tmp, focus_sig[,c('ID','PANEL','FOCUS_pip')], by.x=c('Panel','ID'), by.y=c('PANEL','ID'), all.x=T)
twas_tmp$FOCUS[twas_tmp$FOCUS_pip > 0.5]<-T
twas_tmp$FOCUS[twas_tmp$FOCUS_pip < 0.5 | is.na(twas_tmp$FOCUS_pip)]<-F
twas_tmp<-twas_tmp[order(-twas_tmp$FOCUS_pip),]
twas_tmp<-twas_tmp[!duplicated(paste0(twas_tmp$Panel, twas_tmp$ID)),]
twas_tmp$FOCUS_pip<-NULL

all_func_res<-rbind(all_func_res, twas_tmp)

# SMR expression
smr_res_dat$Z<-smr_res_dat$b_SMR/smr_res_dat$se_SMR
smr_res_dat$Sig<-smr_res_dat$p_SMR_fdr_all < 0.05
smr_res_dat$Coloc<-smr_res_dat$p_HEIDI > 0.05
smr_tmp<-smr_res_dat[,c('eQTL_source','HGNCName','Z','Sig','Coloc'), with=F]
names(smr_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_tmp$Method<-'SMR'
smr_tmp$Type<-'Expr.'
smr_tmp$Tissue<-NA
smr_tmp$Tissue[!grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Brain'
smr_tmp$Tissue[grepl('eQTLGen_Blood',smr_tmp$Panel)]<-'Blood'
smr_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, smr_tmp)

# PWAS
pwas_smr_rosmap_banner$rosmap.sig<-T
pwas_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
pwas_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
pwas_rosmap_tmp<-pwas_rosmap_tmp[,c('PANEL','ID','rosmap.TWAS.Z','rosmap.sig','rosmap.causal'), with=F]
names(pwas_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_rosmap_tmp<-pwas_rosmap_tmp[order(-abs(pwas_rosmap_tmp$Z)),]
pwas_rosmap_tmp<-pwas_rosmap_tmp[!duplicated(paste0(pwas_rosmap_tmp$Panel, pwas_rosmap_tmp$ID)),]
pwas_rosmap_tmp$Method<-'FUSION'
pwas_rosmap_tmp$Type<-'Protein'
pwas_rosmap_tmp$Tissue<-'Brain'
pwas_rosmap_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, pwas_rosmap_tmp)

pwas_banner_tmp<-pwas_smr_rosmap_banner[,c('ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
pwas_banner_tmp$PANEL<-'Banner et al. DLPFC'
pwas_banner_tmp<-pwas_banner_tmp[,c('PANEL','ID','banner.TWAS.Z','banner_replicated','banner.causal'), with=F]
names(pwas_banner_tmp)<-c('Panel','ID','Z','Sig','Coloc')
pwas_banner_tmp<-pwas_banner_tmp[order(-abs(pwas_banner_tmp$Z)),]
pwas_banner_tmp<-pwas_banner_tmp[!duplicated(paste0(pwas_banner_tmp$Panel, pwas_banner_tmp$ID)),]
pwas_banner_tmp$Method<-'FUSION'
pwas_banner_tmp$Type<-'Protein'
pwas_banner_tmp$Tissue<-'Brain'
pwas_banner_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, pwas_banner_tmp)

# SMR protein
pwas_smr_rosmap_banner$z_SMR<-abs(qnorm(pwas_smr_rosmap_banner$p_SMR/2))
pwas_smr_rosmap_banner$z_SMR<-sign(pwas_smr_rosmap_banner$b_SMR)*pwas_smr_rosmap_banner$z_SMR

smr_rosmap_tmp<-pwas_smr_rosmap_banner[,c('ID','z_SMR','smr_replicated','smr.causal'), with=F]
smr_rosmap_tmp$PANEL<-'ROSMAP DLPFC'
smr_rosmap_tmp<-smr_rosmap_tmp[,c('PANEL','ID','z_SMR','smr_replicated','smr.causal'), with=F]
names(smr_rosmap_tmp)<-c('Panel','ID','Z','Sig','Coloc')
smr_rosmap_tmp<-smr_rosmap_tmp[order(-abs(smr_rosmap_tmp$Z)),]
smr_rosmap_tmp<-smr_rosmap_tmp[!duplicated(paste0(smr_rosmap_tmp$Panel, smr_rosmap_tmp$ID)),]
smr_rosmap_tmp$Method<-'SMR'
smr_rosmap_tmp$Type<-'Protein'
smr_rosmap_tmp$Tissue<-'Brain'
smr_rosmap_tmp$FOCUS<-F

all_func_res<-rbind(all_func_res, smr_rosmap_tmp)

# Subset to high confidence genes
high_conf_id<-Genes$external_gene_name[Genes$ensembl_gene_id %in% high_conf]
all_func_res<-all_func_res[all_func_res$ID %in% high_conf_id,]

# Remove missing values
all_func_res<-all_func_res[complete.cases(all_func_res),]

all_func_res$Group<-paste0(all_func_res$Tissue,'\n',all_func_res$Method,'\n',all_func_res$Type )
all_func_res$Group<-factor(all_func_res$Group, levels=c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr."))

all_func_res$ID<-factor(all_func_res$ID, levels=rev(levels(high_conf_tab_ordered$ID)))

all_func_res$Panel[all_func_res$Panel == "Adrenal_Gland"]<-'GTeX Adrenal Gland'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellar_Hemisphere"]<-'GTeX Cerebellar Hemisphere'
all_func_res$Panel[all_func_res$Panel == "Brain_Cerebellum"]<-'GTeX Cerebellum'
all_func_res$Panel[all_func_res$Panel == "Brain_Hypothalamus"]<-'GTeX Hypothalamus'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "CMC.BRAIN.RNASEQ_SPLICING"]<-'CMC DLPFC'
all_func_res$Panel[all_func_res$Panel == "Pituitary"]<-'GTeX Pituitary'
all_func_res$Panel[all_func_res$Panel == "PsychENCODE"]<-'PsychENCODE DLPFC'
all_func_res$Panel[all_func_res$Panel == "Whole_Blood"]<-'GTeX'
all_func_res$Panel[all_func_res$Panel == "NTR.BLOOD.RNAARR"]<-'NTR'
all_func_res$Panel[all_func_res$Panel == "Thyroid"]<-'GTeX Thyroid'
all_func_res$Panel[all_func_res$Panel == "Brain_Caudate_basal_ganglia"]<-'GTeX Caudate basalganglia'
all_func_res$Panel[all_func_res$Panel == "YFS.BLOOD.RNAARR"]<-'YFS'
all_func_res$Panel[all_func_res$Panel == "Brain_Cortex"]<-'GTeX Cortex'
all_func_res$Panel[all_func_res$Panel == "Brain_Frontal_Cortex_BA9"]<-'GTeX Frontal Cortex BA9'
all_func_res$Panel[all_func_res$Panel == "Brain_Hippocampus"]<-'GTeX Hippocampus'
all_func_res$Panel[all_func_res$Panel == "Brain_Amygdala"]<-'GTeX Amygdala'
all_func_res$Panel[all_func_res$Panel == "Brain_Anterior_cingulate_cortex_BA24"]<-'GTeX Anterior cingulate cortex BA24'
all_func_res$Panel[all_func_res$Panel == "Brain_Substantia_nigra"]<-'GTeX Substantia nigra'
all_func_res$Panel[all_func_res$Panel == "Brain_Nucleus_accumbens_basal_ganglia"]<-'GTeX Nucleus accumbens basalganglia'
all_func_res$Panel[all_func_res$Panel == "Brain_Putamen_basal_ganglia"]<-'GTeX Nucleus Putamen basalganglia'
all_func_res$Panel[all_func_res$Panel == "eQTLGen_Blood"]<-'eQTLGen'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cerebellum"]<-'MetaBrain Cerebellum'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Basalganglia"]<-'MetaBrain Basalganglia'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Cortex"]<-'MetaBrain Cortex'
all_func_res$Panel[all_func_res$Panel == "MetaBrain_Hippocampus"]<-'MetaBrain Hippocampus'

all_func_res$Panel<-factor(all_func_res$Panel, levels=c("GTeX Adrenal Gland" ,"GTeX Amygdala" ,"GTeX Anterior cingulate cortex BA24" ,"GTeX Caudate basalganglia" ,"GTeX Cerebellar Hemisphere" ,"GTeX Cerebellum" ,"GTeX Cortex" ,"GTeX Frontal Cortex BA9" ,"GTeX Hippocampus" ,"GTeX Hypothalamus", "GTeX Nucleus accumbens basalganglia","GTeX Nucleus Putamen basalganglia" ,"GTeX Pituitary",'GTeX Substantia nigra' ,"GTeX Thyroid","CMC DLPFC", "PsychENCODE DLPFC", "GTeX" ,"NTR" ,"YFS", "eQTLGen" ,'MetaBrain Basalganglia',"MetaBrain Cerebellum" ,"MetaBrain Cortex" , 'MetaBrain Hippocampus',"ROSMAP DLPFC" ,"Banner et al. DLPFC"))


# Create heatmap
library(ggplot2)

heatmap<-ggplot(data = all_func_res, aes(x = Panel, y = ID)) +
    theme_bw()  +
    geom_tile(aes(fill = Z), width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='black', fill=NA, size=0.3, width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T,], aes(x = Panel, y = ID), colour='green2', fill=NA, size=0.3, width=0.95, height=0.95) +
  geom_tile(data=all_func_res[all_func_res$Coloc ==T & all_func_res$Sig == T & all_func_res$FOCUS == T,], aes(x = Panel, y = ID), colour='magenta2', fill=NA, size=0.3, width=0.95, height=0.95) +
    scale_fill_gradientn(colours=c("dodgerblue2","white","red"), na.value = 'white',name = "Z-score") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),plot.title = element_text(hjust = 0.5)) +
    geom_text(aes(label=round(Z,1)), color="black", size=3) +
    labs(title="High confidence genes across panels and methods") +
  facet_wrap(~ Group , nrow=1, scales = "free_x")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
group_siz<-NULL
for(i in c("Brain\nFUSION\nExpr.","Brain\nFUSION\nSplice","Brain\nSMR\nExpr.","Brain\nFUSION\nProtein","Brain\nSMR\nProtein","Blood\nFUSION\nExpr.","Blood\nSMR\nExpr.","HPA/HPT\nFUSION\nExpr.")){
  group_siz<-rbind(group_siz, data.frame(Group=i,
                           Size=length(unique(all_func_res$Panel[all_func_res$Group==i]))))
}

# Set minimum size to 3 to allow space for labels
group_siz$Size[group_siz$Size < 2]<-2
group_siz$Prop<-group_siz$Size/sum(group_siz$Size)
group_siz$Width<-4*group_siz$Prop

library(grid)
gt = ggplot_gtable(ggplot_build(heatmap))

for(i in 1:nrow(group_siz)){
  gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]] = group_siz$Width[i]*gt$widths[gt$layout$l[grep(paste0('panel-',i,'-1'), gt$layout$name)]]

}

png('../../docs/figures/gene_con_func_heatmap.png', units = 'px', res=300, height=8000, width=3200)

grid.draw(gt)

a<-dev.off()
Show heatmap of high confidence associations across expression and protein datasets and methods
High-confidence genes
High-confidence genes
  • Black box indicates significance
  • Green box indicates colocalisation
  • Magenta box indicates FOCUS PIP > 0.5
  • Some MetaBrain Basalganglia, Hippocampus, Spinalcord panels are not present due to not containing any high confidence genes
  • Banner PWAS and ROSMAP SMR results only shown for genes that were signficant in the discovery ROSMAP PWAS.

Query GO terms using PANTHER

Conduct an overrepresentation test with PANTHER using the PANTHER API. The service returns results as JSON. The first step is querying the IDs for humans (the taxon) and the annotation datasets.

Show code
library(httr)
library(jsonlite)

# the PantherDB URL
panther_db <- "http://pantherdb.org"

# Get list of taxon IDs
supportedgenomes_response <- GET(modify_url(panther_db, path="/services/oai/pantherdb/supportedgenomes"))

# find the taxon ID for humans
supportedgenomes <- fromJSON(content(supportedgenomes_response, "text"))
genomes <- supportedgenomes$search$output$genomes$genome
human_taxon_id <- genomes$taxon_id[which(genomes$name == 'human')]

# get list of annotation datasets
supportedannotdatasets_response <- GET(modify_url(panther_db, path="services/oai/pantherdb/supportedannotdatasets"))

# find GO ids for biological process, molecular function, and cellular components
supportedannotdatasets <- fromJSON(content(supportedannotdatasets_response, "text"))
annotation_data_types <- supportedannotdatasets$search$annotation_data_sets$annotation_data_type
biological_process_id = annotation_data_types$id[which(annotation_data_types$label == "biological_process")]
cellular_component_id = annotation_data_types$id[which(annotation_data_types$label == "cellular_component")]
molecular_function_id = annotation_data_types$id[which(annotation_data_types$label == "molecular_function")]

The next step is to query the overrepresentation test.

Show code
# construct enrichment overrepresentation query from gene list
# and annotation ID
overrep_url <- function(gene_list, annot_data_set_id, url=panther_db, organism_id=human_taxon_id) {
    modify_url(url,
        path="/services/oai/pantherdb/enrich/overrep",
        query=list(geneInputList=paste(gene_list, collapse=","),
                   organism=organism_id,
                   annotDataSet=annot_data_set_id,
                   enrichmentTestType="FISHER",
                   correction="FDR"))
}

# construct URL and query PANTHER. Parse out JSON response
overrep_query <- function(genes, annot_data_set_id, ...) {
    # make query
    overrep_response <- GET(overrep_url(genes, annot_data_set_id, ...))
    # parse JSON
    overrep <- fromJSON(content(overrep_response, "text"))
    return(overrep)
}

high_conf_overrep_biol <- overrep_query(high_conf, biological_process_id)

high_conf_overrep_mol <- overrep_query(high_conf, molecular_function_id)

high_conf_overrep_cell<- overrep_query(high_conf, cellular_component_id)

Biological processes

Show code
# extract results table from the query
panther_format <- function(query) {
    results <- query$results$result
    results_labels <- results$term
    results_terms <- cbind(results_labels,
                           results[,c('fold_enrichment', 'fdr',
                                      'number_in_list', 'expected', 
                                      'number_in_reference', 'pValue')])
                                      
    return(results_terms)
}

high_conf_overrep_biol_results <- panther_format(high_conf_overrep_biol)
high_conf_overrep_mol_results <- panther_format(high_conf_overrep_mol)
high_conf_overrep_cell_results <- panther_format(high_conf_overrep_cell)
Show Go: Biological Processes table
# filter for FDR
knitr::kable(high_conf_overrep_biol_results[which(high_conf_overrep_biol_results$fdr <= 0.05),], caption='GO: Biological Processes')
GO: Biological Processes
id label fold_enrichment fdr number_in_list expected number_in_reference pValue
GO:0007399 nervous system development 2.6788053 0.0000000 63 23.5179465 2191 0.0000000
GO:0007275 multicellular organism development 1.8729532 0.0000119 85 45.3828744 4228 0.0000000
GO:0048731 system development 1.9176313 0.0000171 79 41.1966584 3838 0.0000000
GO:0048856 anatomical structure development 1.7024324 0.0000767 94 55.2151149 5144 0.0000000
GO:0032502 developmental process 1.6246480 0.0001799 99 60.9362767 5677 0.0000001
GO:0022008 neurogenesis 2.6721141 0.0001651 37 13.8467143 1290 0.0000001
GO:0050807 regulation of synapse organization 6.2108597 0.0002931 14 2.2541163 210 0.0000001
GO:0065008 regulation of biological quality 1.8242395 0.0002813 72 39.4685026 3677 0.0000001
GO:0050803 regulation of synapse structure or activity 6.0383358 0.0003156 14 2.3185196 216 0.0000002
GO:0032501 multicellular organismal process 1.5288851 0.0003363 108 70.6397105 6581 0.0000002
GO:0048699 generation of neurons 2.7110896 0.0003738 33 12.1722279 1134 0.0000003
GO:0007417 central nervous system development 2.7930849 0.0004436 31 11.0988392 1034 0.0000003
GO:0099537 trans-synaptic signaling 4.1069490 0.0004480 19 4.6263053 431 0.0000004
GO:0007416 synapse assembly 8.6261941 0.0005981 10 1.1592598 108 0.0000005
GO:0120035 regulation of plasma membrane bounded cell projection organization 3.3480416 0.0007144 23 6.8696877 640 0.0000007
GO:0099536 synaptic signaling 3.8396855 0.0009605 19 4.9483219 461 0.0000010
GO:0031344 regulation of cell projection organization 3.2663820 0.0009485 23 7.0414299 656 0.0000010
GO:0007420 brain development 3.0052547 0.0012804 25 8.3187624 775 0.0000015
GO:0050808 synapse organization 4.6426692 0.0012925 15 3.2309000 301 0.0000016
GO:0032879 regulation of localization 2.0811103 0.0014213 47 22.5840983 2104 0.0000018
GO:0030182 neuron differentiation 2.6243069 0.0013938 30 11.4315897 1065 0.0000019
GO:0050804 modulation of chemical synaptic transmission 3.8286122 0.0014162 18 4.7014425 438 0.0000020
GO:0098742 cell-cell adhesion via plasma-membrane adhesion molecules 4.8849459 0.0013649 14 2.8659478 267 0.0000020
GO:0099177 regulation of trans-synaptic signaling 3.8198909 0.0013388 18 4.7121764 439 0.0000020
GO:0031346 positive regulation of cell projection organization 4.2467417 0.0013281 16 3.7675943 351 0.0000021
GO:0010975 regulation of neuron projection development 3.7683868 0.0014843 18 4.7765797 445 0.0000025
GO:0098609 cell-cell adhesion 3.4314142 0.0015397 20 5.8285007 543 0.0000027
GO:0009653 anatomical structure morphogenesis 1.9990250 0.0020335 48 24.0117053 2237 0.0000036
GO:0000902 cell morphogenesis 3.0052547 0.0021488 23 7.6532615 713 0.0000040
GO:0060322 head development 2.8299786 0.0021548 25 8.8339890 823 0.0000041
GO:0048523 negative regulation of cellular process 1.5947157 0.0029075 81 50.7927534 4732 0.0000057
GO:0048812 neuron projection morphogenesis 3.5009021 0.0032162 18 5.1415319 479 0.0000066
GO:0120039 plasma membrane bounded cell projection morphogenesis 3.4647358 0.0035752 18 5.1952013 484 0.0000075
GO:0007155 cell adhesion 2.5958702 0.0035846 27 10.4011365 969 0.0000078
GO:0048858 cell projection morphogenesis 3.4293091 0.0038571 18 5.2488708 489 0.0000086
GO:0034762 regulation of transmembrane transport 3.1420876 0.0040816 20 6.3651950 593 0.0000094
GO:0034330 cell junction organization 3.3673336 0.0046289 18 5.3454757 498 0.0000109
GO:0010976 positive regulation of neuron projection development 6.0105094 0.0045767 10 1.6637525 155 0.0000111
GO:0034329 cell junction assembly 4.3881074 0.0056331 13 2.9625528 276 0.0000140
GO:0032990 cell part morphogenesis 3.3010475 0.0055409 18 5.4528146 508 0.0000141
GO:0034765 regulation of ion transmembrane transport 3.2881022 0.0056871 18 5.4742824 510 0.0000149
GO:0007268 chemical synaptic transmission 3.6004984 0.0058157 16 4.4438292 414 0.0000156
GO:0098916 anterograde trans-synaptic signaling 3.6004984 0.0056804 16 4.4438292 414 0.0000156
GO:0048513 animal organ development 1.7178162 0.0064007 60 34.9280684 3254 0.0000180
GO:0051049 regulation of transport 2.0573912 0.0063630 39 18.9560445 1766 0.0000183
GO:0048666 neuron development 2.6523244 0.0062673 24 9.0486668 843 0.0000184
GO:0007156 homophilic cell adhesion via plasma membrane adhesion molecules 5.4801703 0.0078785 10 1.8247608 170 0.0000236
GO:0050793 regulation of developmental process 1.8340626 0.0091952 49 26.7166448 2489 0.0000281
GO:0001764 neuron migration 6.0321300 0.0095528 9 1.4920103 139 0.0000299
GO:0043269 regulation of ion transport 2.7633062 0.0111401 21 7.5995920 708 0.0000355
GO:0048519 negative regulation of biological process 1.4901856 0.0121250 85 57.0398757 5314 0.0000394
GO:0007610 behavior 2.9209489 0.0124983 19 6.5047355 606 0.0000414
GO:0051252 regulation of RNA metabolic process 1.6139627 0.0153932 65 40.2735441 3752 0.0000520
GO:0016477 cell migration 2.4760903 0.0155100 24 9.6927000 903 0.0000534
GO:0050890 cognition 3.8205604 0.0158478 13 3.4026422 317 0.0000556
GO:0050794 regulation of cellular process 1.2491673 0.0157799 150 120.0799942 11187 0.0000564
NA UNCLASSIFIED 0.3760704 0.0226496 11 29.2498421 2725 0.0000823
GO:0008150 biological_process 1.0951751 0.0222591 210 191.7501579 17864 0.0000823
GO:0099175 regulation of postsynapse organization 7.1663766 0.0224651 7 0.9767837 91 0.0000845
GO:1900244 positive regulation of synaptic vesicle endocytosis 46.5814480 0.0247443 3 0.0644033 6 0.0000947
GO:0030154 cell differentiation 1.6149294 0.0265731 61 37.7725484 3519 0.0001034
GO:0048522 positive regulation of cellular process 1.4459144 0.0263692 88 60.8611394 5670 0.0001043
GO:0048667 cell morphogenesis involved in neuron differentiation 3.1760078 0.0280498 15 4.7229103 440 0.0001127
GO:0050789 regulation of biological process 1.2231280 0.0291233 155 126.7242702 11806 0.0001189
GO:0019219 regulation of nucleobase-containing compound metabolic process 1.5576781 0.0290954 68 43.6547185 4067 0.0001206
GO:0032989 cellular component morphogenesis 2.7763777 0.0292931 18 6.4832678 604 0.0001233
GO:0032409 regulation of transporter activity 3.7389791 0.0305451 12 3.2094322 299 0.0001305
GO:0048468 cell development 1.9442691 0.0304258 36 18.5159551 1725 0.0001319
GO:0016339 calcium-dependent cell-cell adhesion via plasma membrane cell adhesion molecules 11.0908209 0.0313011 5 0.4508233 42 0.0001377
GO:0048869 cellular developmental process 1.6044429 0.0340559 61 38.0194278 3542 0.0001520
GO:0099054 presynapse assembly 16.9387084 0.0340189 4 0.2361455 22 0.0001540
GO:0010468 regulation of gene expression 1.4775578 0.0373606 77 52.1130215 4855 0.0001715
GO:0051963 regulation of synapse assembly 6.3314589 0.0375147 7 1.1055904 103 0.0001746
GO:0051128 regulation of cellular component organization 1.7734054 0.0379139 45 25.3749089 2364 0.0001789
GO:0044057 regulation of system process 2.7883261 0.0379270 17 6.0968478 568 0.0001814
GO:1903423 positive regulation of synaptic vesicle recycling 34.9360860 0.0377736 3 0.0858711 8 0.0001831
GO:0007267 cell-cell signaling 2.2365977 0.0410147 26 11.6247997 1083 0.0002014
GO:0001505 regulation of neurotransmitter levels 4.1777083 0.0408758 10 2.3936568 223 0.0002033
GO:0032412 regulation of ion transmembrane transporter activity 3.7676171 0.0462954 11 2.9196173 272 0.0002332
GO:2001257 regulation of cation channel activity 4.5322490 0.0466457 9 1.9857691 185 0.0002380
GO:0007611 learning or memory 3.7265158 0.0494273 11 2.9518189 275 0.0002553
Show Go: Molecular functions table
# filter for FDR
knitr::kable(high_conf_overrep_mol_results[which(high_conf_overrep_mol_results$fdr <= 0.05),], caption='GO: Molecular Functions')
GO: Molecular Functions
id label fold_enrichment fdr number_in_list expected number_in_reference pValue
Show Go: Cellular Components table
# filter for FDR
knitr::kable(high_conf_overrep_cell_results[which(high_conf_overrep_cell_results$fdr <= 0.05),], caption='GO: Cellular Components')
GO: Cellular Components
id label fold_enrichment fdr number_in_list expected number_in_reference pValue
GO:0045202 synapse 3.493609 0.0000000 51 14.5980864 1360 0.0000000
GO:0043005 neuron projection 3.408399 0.0000000 51 14.9630385 1394 0.0000000
GO:0030054 cell junction 2.616935 0.0000000 60 22.9275827 2136 0.0000000
GO:0030425 dendrite 4.443381 0.0000000 30 6.7516149 629 0.0000000
GO:0097447 dendritic tree 4.429298 0.0000000 30 6.7730827 631 0.0000000
GO:0036477 somatodendritic compartment 3.787110 0.0000000 35 9.2418767 861 0.0000000
GO:0098794 postsynapse 4.315853 0.0000000 29 6.7194133 626 0.0000000
GO:0042995 cell projection 2.337839 0.0000001 60 25.6647239 2391 0.0000000
GO:0030424 axon 4.019355 0.0000002 28 6.9662927 649 0.0000000
GO:0120025 plasma membrane bounded cell projection 2.327031 0.0000003 57 24.4947302 2282 0.0000000
GO:0098984 neuron to neuron synapse 5.219210 0.0000008 20 3.8319977 357 0.0000000
GO:0098793 presynapse 4.242712 0.0000009 24 5.6567585 527 0.0000000
GO:0097060 synaptic membrane 4.864903 0.0000020 20 4.1110787 383 0.0000000
GO:0098978 glutamatergic synapse 4.990869 0.0000071 18 3.6065860 336 0.0000000
GO:0030426 growth cone 7.166377 0.0000107 13 1.8140269 169 0.0000001
GO:0030427 site of polarized growth 6.920672 0.0000147 13 1.8784302 175 0.0000001
GO:0014069 postsynaptic density 4.858188 0.0000202 17 3.4992472 326 0.0000002
GO:0032279 asymmetric synapse 4.770389 0.0000244 17 3.5636505 332 0.0000002
GO:0099572 postsynaptic specialization 4.564177 0.0000418 17 3.7246588 347 0.0000004
GO:0099240 intrinsic component of synaptic membrane 6.615117 0.0000574 12 1.8140269 169 0.0000006
GO:0070161 anchoring junction 2.440645 0.0001421 35 14.3404731 1336 0.0000015
GO:0031224 intrinsic component of membrane 1.532387 0.0001403 98 63.9524989 5958 0.0000015
GO:0099699 integral component of synaptic membrane 6.527337 0.0001685 11 1.6852203 157 0.0000019
GO:0045211 postsynaptic membrane 4.777584 0.0002181 14 2.9303512 273 0.0000026
GO:0150034 distal axon 4.641568 0.0002882 14 3.0162223 281 0.0000035
GO:0043197 dendritic spine 5.822681 0.0004205 11 1.8891641 176 0.0000054
GO:0044309 neuron spine 5.757258 0.0004483 11 1.9106319 178 0.0000059
GO:0098839 postsynaptic density membrane 8.190145 0.0007770 8 0.9767837 91 0.0000106
GO:0016021 integral component of membrane 1.461946 0.0031332 91 62.2458109 5799 0.0000445
GO:0098797 plasma membrane protein complex 2.694794 0.0034360 21 7.7928020 726 0.0000505
GO:0098590 plasma membrane region 2.223458 0.0035154 30 13.4924960 1257 0.0000533
GO:0099634 postsynaptic specialization membrane 6.370113 0.0036900 8 1.2558648 117 0.0000578
GO:0005886 plasma membrane 1.433995 0.0045491 92 64.1564428 5977 0.0000735
GO:0071944 cell periphery 1.398886 0.0063861 97 69.3409102 6460 0.0001063
GO:0031225 anchored component of membrane 4.903310 0.0079229 9 1.8354947 171 0.0001357
GO:0043025 neuronal cell body 2.951696 0.0085727 16 5.4206129 505 0.0001511
GO:0005891 voltage-gated calcium channel complex 10.351433 0.0102367 5 0.4830249 45 0.0001854
GO:0044297 cell body 2.759180 0.0109944 17 6.1612512 574 0.0002045
GO:0044295 axonal growth cone 14.906063 0.0125030 4 0.2683472 25 0.0002387
GO:0005794 Golgi apparatus 1.909306 0.0145251 34 17.8075186 1659 0.0002844
GO:0016020 membrane 1.253900 0.0158242 134 106.8665792 9956 0.0003176
GO:0099061 integral component of postsynaptic density membrane 8.957971 0.0167807 5 0.5581621 52 0.0003450
GO:0098889 intrinsic component of presynaptic membrane 6.734667 0.0177693 6 0.8909126 83 0.0003740
GO:0099055 integral component of postsynaptic membrane 5.480170 0.0186521 7 1.2773326 119 0.0004017
GO:0099146 intrinsic component of postsynaptic density membrane 8.318116 0.0215016 5 0.6010977 56 0.0004736
GO:0016342 catenin complex 12.021019 0.0222330 4 0.3327505 31 0.0005006
GO:0098936 intrinsic component of postsynaptic membrane 5.217122 0.0231041 7 1.3417359 125 0.0005315
GO:0034702 ion channel complex 3.404624 0.0226476 11 3.2309000 301 0.0005321
GO:0120111 neuron projection cytoplasm 6.210860 0.0233944 6 0.9660498 90 0.0005611
GO:0005798 Golgi-associated vesicle 6.075841 0.0255794 6 0.9875176 92 0.0006260
GO:0034703 cation channel complex 3.743152 0.0359033 9 2.4043907 224 0.0008963
GO:0043198 dendritic shaft 9.806621 0.0396741 4 0.4078877 38 0.0010098
GO:0000785 chromatin 1.968230 0.0415101 27 13.7179076 1278 0.0010769
GO:0034704 calcium channel complex 6.560767 0.0489340 5 0.7621060 71 0.0012934
GO:0099568 cytoplasmic region 3.246094 0.0497101 10 3.0806256 287 0.0013383